%% Compute statistics based on the results of the counterfactual experiment.

clear
cd ../china/data
load trade_share.raw; 
load hs10_indlist.raw;
indN=length(hs10_indlist);


%%%%%%%%% This stuff compares the product-weighted import price index in the data, the MC estimation with the estimated parameters, and the MC estimation with the parameters cut in half



ZZ=50;

priceratio_all=zeros(indN,3);
priceratio_weight=zeros(indN,3);

gen_data_WA_weight=zeros(ZZ,2);
gen_data_med_weight=zeros(ZZ,2);
true_data_weight=zeros(ZZ,1);


WA_price_all=zeros(1000,ZZ);
WA_price_weight=zeros(1000,ZZ);

Medprice_all=zeros(1000,ZZ);
Medprice_weight=zeros(1000,ZZ);


Z=zeros(ZZ,1);


for w=1:50 


folder=sprintf('../china/estimation/hs10/biggest/raw_%d',hs10_indlist(w,1))




cd (folder)

load imp.raw;
load firmshare.raw
load price_maxyear.raw
load CF1_halfcost_biggest.mat

I1=imp(:,3);

M=length(imp);


%load priceratio_final.mat


priceratio=priceratio(1:M,:);

priceratio=[priceratio(:,1) priceratio(:,3)];

% Observed prices
true_data=zeros(M,1);
for i=1:M
  true_data(i,1)=exp(price_maxyear(I1(i,1)));
end

%% Take the median across firms in an HS
true_data_med(w,:)=median(true_data,1);


%% Weight prices by firm's import share
true_data_firm=zeros(M,1);
for j=1:M
true_data_firm(j,:)=(firmshare(j,1).*true_data(j,1))/sum(firmshare);
end
true_data_WA(w,1)=sum(true_data_firm);

% Generated prices (both with actual params and adjusted params)
gen_data=zeros(M,2);
for i=1:M
  gen_data(i,:)=priceratio(i,:).*true_data(i,1);
end

%% Take the median across firms in an HS
gen_data_med(w,:)=median(gen_data,1);

%% Weight prices by firm's import share
gen_data_firm_WA=zeros(M,2);
for j=1:M
gen_data_firm_WA(j,:)=(firmshare(j,1).*gen_data(j,:))/sum(firmshare);
end
gen_data_WA(w,:)=sum(gen_data_firm_WA,1);


% How to summarize prices across products?  Weight by the importance of the product.  Do for all 4 measures above.
true_data_WA_weight(w,1)=(true_data_WA(w,1).*trade_share(w,1))/sum(trade_share);
true_data_med_weight(w,1)=(true_data_med(w,1).*trade_share(w,1))/sum(trade_share);

gen_data_WA_weight(w,:)=(gen_data_WA(w,:).*trade_share(w,1))/sum(trade_share);
gen_data_med_weight(w,:)=(gen_data_med(w,:).*trade_share(w,1))/sum(trade_share);


%% Now for a different exercise.  Take advantage of the MC estimation results.
% Make 1000 import price indexes, always weighting by product trade share.
load CF1_halfcost_biggest.mat

% For each MC run in a product, take the WA across firms for the true data...
WA_price_all(:,w)=WAprice_firms(:,1);
WA_price_weight(:,w)=(WA_price_all(:,w).*trade_share(w,1))/sum(trade_share);

% and the generated data.
WA_price_all2(:,w)=WAprice_firms(:,2);
WA_price_weight2(:,w)=(WA_price_all2(:,w).*trade_share(w,1))/sum(trade_share);

% We also have the true data point.
WA_true(:,w)=sum(firmshare.*exp(price_maxyear(I1)));
WA_true_weight(:,w)=(WA_true(:,w).*trade_share(w,1))/sum(trade_share);


% For each MC run in a product, use the median across firms for the true data...
Medprice_all(:,w)=Medprice_firms(:,1);
Medprice_weight(:,w)=(Medprice_all(:,w).*trade_share(w,1))/sum(trade_share);

% and the generated data.
Medprice_all2(:,w)=Medprice_firms(:,2);
Medprice_weight2(:,w)=(Medprice_all2(:,w).*trade_share(w,1))/sum(trade_share);

% We also have the true data point.
Medprice_true(:,w)=median(exp(price_maxyear(I1)));
Medprice_true_weight(:,w)=(Medprice_true(:,w).*trade_share(w,1))/sum(trade_share);


end

% Summarize the actual change in prices.
Q=[sum(gen_data_WA_weight) sum(true_data_WA_weight)]
[Q(1)/Q(3) (Q(2)/Q(1))-1]

Q2=[median(gen_data_WA) median(true_data_WA)] 
[Q2(1)/Q2(3) (Q2(2)/Q2(1))-1]

R=[sum(gen_data_med_weight) sum(true_data_med_weight)]
[R(1)/R(3) (R(2)/R(1))-1]

R2=[median(gen_data_med) median(true_data_med)]
[R2(1)/R2(3) (R2(2)/R2(1))-1]

% Summarize the distribution of price indices
cd /projects/programs/monarch/china/china_updated/estimation/results
disn=[sum(WA_price_weight,2), sum(WA_price_weight2,2)];
median(disn)
mean(disn)
sqrt(var(disn))
sum(WA_true_weight)
%dlmwrite('disn.txt',disn);

disn2=[sum(Medprice_weight,2), sum(Medprice_weight2,2)];
median(disn2)
mean(disn2)
sqrt(var(disn2))
sum(Medprice_true_weight)
%dlmwrite('disn2.txt',disn2);



ModelFit=[sum(gen_data_WA_weight(:,1))/sum(true_data_WA_weight) sum(gen_data_med_weight(:,1))/sum(true_data_med_weight) ]

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Same analysis for change in quality 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

ZZ=50;

gen_data_WA_weight=zeros(ZZ,2);


Z=zeros(ZZ,1);



for w=1:50


folder=sprintf('../china/estimation/hs10/biggest/raw_%d',hs10_indlist_finalparam(w,1))



cd (folder)

load imp.raw;
load firmshare.raw
load quality_MC.mat

I1=imp(:,3);

M=length(imp);

% Generated prices (both with actual params and adjusted params)
%gen_data=zeros(M,2);
%for i=1:M
%  gen_data(i,:)=Pquality_med(i,:);
%end

gen_data=Pquality_med;


%% Weight prices by firm's import share
gen_data_firm_WA=zeros(M,2);
for j=1:M
gen_data_firm_WA(j,:)=(firmshare(j,1).*gen_data(j,:))/sum(firmshare);
end
gen_data_WA(w,:)=sum(gen_data_firm_WA,1);

gen_data_WA_weight(w,:)=(gen_data_WA(w,:).*trade_share(w,1))/sum(trade_share);

end

% Summarize the actual change in prices.
Q=[sum(gen_data_WA_weight)]
[(Q(2)-Q(1))]

qual_min = 3.55; % From alldata_0506, cleaned at 5% and 95%
[ (Q(2)-Q(1) ) / (Q(1)+3.55) ]


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Number of Firm Stuff


firmswitch=zeros(50,6);
firmcswitch=zeros(50,6);



for w=1:50 

folder=sprintf('../china/estimation/hs10/biggest/raw_%d',hs10_indlist(w,1))


cd (folder)

load imp.raw;
load CF1_halfcost_biggest.mat


M=length(imp);

Y1=truth_switching(1,1);
% MC_switch is 1000 x 10, and is the percentage of firms switching in each MC run, the % of firms city switching in each run.
% So I should look at generated data only (first col), and take the mean or median.
Y2=median(MC_switch(:,1));
Y3=mean(MC_switch(:,1));
Y4=median(MC_switch(:,2));
Y5=mean(MC_switch(:,2));
Y6=1;

% Translate that into a number of firms
firmswitch(w,:)=M.*[Y1 Y2 Y3 Y4 Y5 Y6];


Z1=truth_switching(1,2);
% MC_switch is 1000 x 5, and is the percentage of firms switching in each MC run.
% So I should look at generated data only (first col), and take the mean or median.
Z2=median(MC_switch(:,3));
Z3=mean(MC_switch(:,3));
Z4=median(MC_switch(:,4));
Z5=mean(MC_switch(:,4));
Z6=1;

% Translate that into a number of firms
firmcswitch(w,:)=M.*[Z1 Z2 Z3 Z4 Z5 Z6];



end

total_switching=sum(firmswitch,1);
data_switching=total_switching(:,1)/total_switching(:,6)
gen_data_switching=total_switching(:,2)/total_switching(:,6)
cf_switching=total_switching(:,4)/total_switching(:,6)

total_cswitching=sum(firmcswitch,1);
data_switching=total_cswitching(:,1)/total_cswitching(:,6)
gen_data_cswitching=total_cswitching(:,2)/total_cswitching(:,6)
cf_cswitching=total_cswitching(:,4)/total_cswitching(:,6)

%%%%%%%%%%%%%%%